clear 

*add your path here:
capture cd "C:\Users\jamesCabral\OneDrive - DAZ\CGH_Empirical_Replication"


*********************************************
*year-by-year regressions for the AHTUS data*
*********************************************

use "data\ATUS_AHTUS_state_datasets\AHTUS_all.dta"

*merge with the 2010 population weights
merge m:1 idn using "data\uncertainty_popweights\state_pop_shares_AHTUS.dta"

*list of different dependent variables. We will use this to loop through the variables
local varlist other_income job_search childcare nonmkt_work core_home homeowner obtaings otherscare leisure tv socializing esp leisure_noesp other_leisure other education civic medical

*make a new year variable with one-unit differences
recode sample (1993=1) (1995=2) (1998=3), gen(yr_num)

xtset idn yr_num

*we want the difference between years 2/1 (1995/1993) and years 3/2 (1998/1995)
*hence, we use the D. operator and the "if()" option in the regress command to consider first differences for years 2 and 3
forvalues i=2/3 {
	
	*run the regression for all of the dependent variables over the two year periods
	*m index is using numbers to index the time use variables. This will be used to reshape the data
	forvalues m = 1/18{
		local dep_var: word `m' of `varlist'
		reg D.`dep_var' D.paid_work D.male D.black D.hv_child D.age1 D.age2 D.age3 D.age4 D.age5 D.educ1 D.educ2 D.educ3 D.educ4  [aw=popweight]  if (yr_num == `i')
		gen beta`i'`m' = _b[D.paid_work]*-100
		gen ul`i'`m' = (r(table)[6,1])*-100
		gen ll`i'`m' = (r(table)[5,1])*-100
		est store beta`i'`dep_var'
	}
	
}


*run statistical tests for difference between coefficients
foreach dep_var in nonmkt_work leisure other{
	quietly suest beta2`dep_var' beta3`dep_var', cluster(idn)
	test [beta2`dep_var'_mean = beta3`dep_var'_mean]: D.paid_work
} 

*prepare the output file
*start by keeping only the betas, lower limits and upper limits
keep if idn == 1 & sample == 1993
keep beta* ul* ll*

*get a new list with names of the results we have been saving
local betavarlist
forvalues i=1/3 {
	local betavarlist `betavarlist' beta`i' ul`i' ll`i'
}

*reshape the data so that rows correspond to time use categories
gen i_var = 1
reshape long `betavarlist', i(i_var) j(timeuse)

drop i_var
drop *1
rename *2 *1995
rename *3 *1998

save "data\synthetic_series\AHTUS_betas.dta", replace

********************************************
*year-by-year regressions for the ATUS data*
********************************************

clear 
import excel "data\ATUS_AHTUS_state_datasets\state_dataset_year", firstrow

*merge with the population weights
merge m:1 idn using "data\uncertainty_popweights\state_pop_shares.dta"

*rename a couple of variables to reduce length of names
rename (homeproductionpaper otherleisurepaper) (homeprodpaper othleispaper)

local varlist workapaper workupaper childcarepaper homepaper homeprodpaper homeownpaper shoppingpaper othercarepaper leisurepaper tvpaper socializingpaper sleepingpaper eppaper othleispaper otherpaper educationpaper civicpaper ownmedicalpaper

gen popweight = popweight_2010

xtset idn year, yearly

*we use the D. operator in the regress statement and consider first differences in the later of the twoyears we are comparing
forvalues i=2004/2019 {
	
	*run the regression for all of the dependent variables over the two year periods
	*m index is using numbers to index the time use variables. This will be used to reshape the data
	forvalues m = 1/18{
		local dep_var: word `m' of `varlist'
		reg D.`dep_var' D.workpaper D.male D.black D.married D.hvchild D.age1 D.age2 D.age3 D.age4 D.age5 D.educ1 D.educ2 D.educ3 D.educ4  [aw=popweight] if (year == `i'), vce(cluster idn)  
		gen beta`i'`m' = _b[D.workpaper]*-100
		gen ul`i'`m' = (r(table)[6,1])*-100
		gen ll`i'`m' = (r(table)[5,1])*-100
	}
	
}

*************************
*prepare the output file*
*************************

keep if (year == 2004) & (idn == 1)
keep beta* ul* ll*

*get a new list with new variable names
local betavarlist

forvalues i=2004/2019 {
	local betavarlist `betavarlist' beta`i' ul`i' ll`i'
}

*reshape the data so that rows correspond to time use categories
gen i_var = 1
reshape long `betavarlist', i(i_var) j(timeuse)
drop i_var

*merge with the AHTUS dataset 
merge m:1 timeuse using "data\synthetic_series\AHTUS_betas.dta"
drop _merge

*add labels to time use categories
label define timeuse_lbl 1 "Other Income Generating Activities"
label define timeuse_lbl 2 "Job Search", add
label define timeuse_lbl 3 "Child Care", add
label define timeuse_lbl 4 "Nonmarket Work", add
label define timeuse_lbl 5 "Core Home Production", add
label define timeuse_lbl 6 "Homeownership Activities", add
label define timeuse_lbl 7 "Obtaining Goods and Services", add
label define timeuse_lbl 8 "Others Care", add
label define timeuse_lbl 9 "Leisure", add
label define timeuse_lbl 10 "TV Watching", add
label define timeuse_lbl 11 "Socializing", add
label define timeuse_lbl 12 "Sleeping", add
label define timeuse_lbl 13 "Eating and Personal Care", add
label define timeuse_lbl 14 "Other Leisure", add
label define timeuse_lbl 15 "Other", add
label define timeuse_lbl 16 "Education", add
label define timeuse_lbl 17 "Civic and Religious Activities", add
label define timeuse_lbl 18 "Own Medical Care", add
label values timeuse timeuse_lbl

order timeuse *1995 *1998

*export just the betas, which will be used as an input to the Matlab codes
keep timeuse beta*

reshape long beta, i(timeuse) j(year)

*delete intermediate file
erase "data\synthetic_series\AHTUS_betas.dta"

export delimited "data\synthetic_series\ATUS_AHTUS_betas.csv", replace